<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<title>Reranker Framework (ReFr): hadoop-run.py File Reference</title>

<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="doxygen.css" rel="stylesheet" type="text/css" />

<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
  $(document).ready(function() { searchBox.OnSelectItem(0); });
</script>

</head>
<body>
<div id="top"><!-- do not remove this div! -->


<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  
  
  <td style="padding-left: 0.5em;">
   <div id="projectname">Reranker Framework (ReFr)
   
   </div>
   <div id="projectbrief">Reranking framework for structure prediction and discriminative language modeling</div>
  </td>
  
  
  
 </tr>
 </tbody>
</table>
</div>

<!-- Generated by Doxygen 1.7.6.1 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
  <div id="navrow1" class="tabs">
    <ul class="tablist">
      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
      <li><a href="namespaces.html"><span>Namespaces</span></a></li>
      <li><a href="annotated.html"><span>Classes</span></a></li>
      <li class="current"><a href="files.html"><span>Files</span></a></li>
      <li>
        <div id="MSearchBox" class="MSearchBoxInactive">
        <span class="left">
          <img id="MSearchSelect" src="search/mag_sel.png"
               onmouseover="return searchBox.OnSearchSelectShow()"
               onmouseout="return searchBox.OnSearchSelectHide()"
               alt=""/>
          <input type="text" id="MSearchField" value="Search" accesskey="S"
               onfocus="searchBox.OnSearchFieldFocus(true)" 
               onblur="searchBox.OnSearchFieldFocus(false)" 
               onkeyup="searchBox.OnSearchFieldChange(event)"/>
          </span><span class="right">
            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
          </span>
        </div>
      </li>
    </ul>
  </div>
  <div id="navrow2" class="tabs2">
    <ul class="tablist">
      <li><a href="files.html"><span>File&#160;List</span></a></li>
      <li><a href="globals.html"><span>File&#160;Members</span></a></li>
    </ul>
  </div>
</div>
<div class="header">
  <div class="summary">
<a href="#namespaces">Namespaces</a> &#124;
<a href="#var-members">Variables</a>  </div>
  <div class="headertitle">
<div class="title">hadoop-run.py File Reference</div>  </div>
</div><!--header-->
<div class="contents">

<p>A python program which will train a reranking model on a Hadoop cluster using the Iterative Parameter Mixtures perceptron training algorithm.  
<a href="#details">More...</a></p>

<p><a href="hadoop-run_8py_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr><td colspan="2"><h2><a name="namespaces"></a>
Namespaces</h2></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">namespace &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html">hadoop-run</a></td></tr>
<tr><td colspan="2"><h2><a name="var-members"></a>
Variables</h2></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a70a90c07b0d4bb4dff7d8912d00fc5cf">hadoop-run.optParse</a> = OptionParser()</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">The following arguments are available to <a class="el" href="hadoop-run_8py.html" title="A python program which will train a reranking model on a Hadoop cluster using the Iterative Parameter...">hadoop-run.py</a>.  <a href="#a70a90c07b0d4bb4dff7d8912d00fc5cf"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a3935d70733fa64c4ad3b0526a8ced2b9">hadoop-run.help</a> = &quot;Location of hadoop installation. If not set, &quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#ac6a679f329c1e5434af2aabdfbfa610a">hadoop-run.default</a> = &quot;&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#ab12a73005bb25b5a9695d359571a3b3e">hadoop-run.action</a> = &quot;append&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#af439498523072fc119fa5ed248f399e9">hadoop-run.hadooproot</a> = options.hadooproot</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#af780d860089b1ed86b289118d7bb8e09">hadoop-run.streamingloc</a> = options.streamingloc</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a3bec14631fcfd1766537753d2de16d78">hadoop-run.tmppath</a> = &quot;/contrib/streaming&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a9c952cd08d17abdf2b70ff7db0124ca2">hadoop-run.streamingjar</a> = glob.glob(tmppath + &quot;/hadoop-streaming*.jar&quot;)</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">list&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a0cbddcac92897f1138fbf5874a467125">hadoop-run.filenames</a> = []</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Collect input filenames.  <a href="#a0cbddcac92897f1138fbf5874a467125"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a0147327a948ba744feacf4311d120624">hadoop-run.hdproc</a></td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Create output directory if it does not exist.  <a href="#a0147327a948ba744feacf4311d120624"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a9835c3ab1482d09301f4b658c28fe2cc">hadoop-run.train_map_options</a> = &quot;&quot;</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Configuration for training optionsOptions passed to the mapper binary.  <a href="#a9835c3ab1482d09301f4b658c28fe2cc"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a7dad76e7434cf8a25ac583bd9606aa12">hadoop-run.train_files</a> = &quot;&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a676e472b0b6770ad4b06a6582b24a5a0">hadoop-run.train_map</a></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a66112611af16f304a90d19eeb7807faf">hadoop-run.extractsym_map</a> = &quot;'&quot;</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Shortcuts to command-line programs.  <a href="#a66112611af16f304a90d19eeb7807faf"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a9f3638a00a52bea919f962b4e3dcbf22">hadoop-run.compiledata_map</a> = &quot;'&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a8dd932a372d394533574108c5a6891ff">hadoop-run.train_reduce</a> = &quot;/model-merge-reducer&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a466b974ea2319c752ee42a6c7fc26e7d">hadoop-run.train_recomb</a> = &quot;/model-combine-shards&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a7cd180be7125e1b036878f420c5f8a6b">hadoop-run.symbol_recomb</a> = &quot;/model-combine-symbols&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a50dc78a9f0ff850a3cc00a02f5584c43">hadoop-run.pipeeval_options</a> = &quot;&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a56e2a3b19a01270934cd4f828db159d0">hadoop-run.pipeeval</a> = &quot;/piped-model-evaluator&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#aac9abae550160bbf25c42395852a2730">hadoop-run.hadoop_inputfiles</a> = &quot;&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a9342c00affa15bcdc88429e2d4d69e83">hadoop-run.precompdevfile</a> = options.develdata</td></tr>
<tr><td class="mdescLeft">&#160;</td><td class="mdescRight">Precopilation of string features.  <a href="#a9342c00affa15bcdc88429e2d4d69e83"></a><br/></td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#ae78a065b3eb608a5856211edb4ee7a1a">hadoop-run.symbol_dir</a> = &quot;/Symbols/&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a2f9e77fd2ea474bf35a773b8bf469d87">hadoop-run.precomp_dir</a> = &quot;/Precompiled/&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a82ea895544777ec4d6e3bc944fa0c137">hadoop-run.precompdev_dir</a> = &quot;/PrecompiledDev/&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a36c4796df68329c6bd8fcdd6b45af5ff">hadoop-run.addl_data</a> = &quot;&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a5564609b5b72f32ec12a12fc1540dae8">hadoop-run.symfile_name</a> = &quot;/&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a710f4968870f4e191ea575db56a192e7">hadoop-run.cur_model</a> = options.inputmodel</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a7ef5e1b38b72801a5fc43f89e933cc90">hadoop-run.converged</a> = False</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#acfb0d4d3206b9770b9d7d3fc4040a401">hadoop-run.iteration</a> = int(options.startiter)</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a5e1715f08c5580daaa35b07acac7fd8f">hadoop-run.prev_loss</a> = 9999</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">list&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a0b1f35d422c2531b6aac501a4a0fbe34">hadoop-run.loss_history</a> = []</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#af6f26eafc2026ce792a3037cc01146c8">hadoop-run.num_in_decline</a> = 0</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#ae00c11b0ec784431694519d4ef5035d7">hadoop-run.best_loss_index</a> = 0</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a1bb9db73c46fb39ab3c03026a461d1c8">hadoop-run.eval_cmd</a> = &quot; -d &quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">tuple&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a96482b5b1d7e80ee6061d559e622a402">hadoop-run.evalio</a> = <a class="el" href="classpyutil_1_1_command_i_o.html">pyutil.CommandIO</a>(eval_cmd)</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a29c7856a0d1d420fa831801bd72aaac2">hadoop-run.iter_str</a> = &quot;'&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a88eb645c6d5e451590df140f0a1ce8fd">hadoop-run.model_output</a> = &quot;/&quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">string&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a696c38c00767bd4846d4a8f312f521ea">hadoop-run.proc_cmd</a> = &quot; -o &quot;</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#ab913a16649b25375ec6afac29d20b21b">hadoop-run.devtest_score</a> = 0</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">float&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#acccc808b1c10830f49ff1b785c460ddd">hadoop-run.loss</a> = 0.0</td></tr>
<tr><td class="memItemLeft" align="right" valign="top">list&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacehadoop-run.html#a44e02ad404f4fa147620724c270dd9a6">hadoop-run.diff</a> = loss_history[-1]</td></tr>
</table>
<hr/><a name="details" id="details"></a><h2>Detailed Description</h2>
<div class="textblock"><p>A python program which will train a reranking model on a Hadoop cluster using the Iterative Parameter Mixtures perceptron training algorithm. </p>
<p>You must first have a Hadoop account configured. In order to train, you will need to have the following:</p>
<ul>
<li>Training data locally accessible (accessible by the script)</li>
<li>A HadoopFS (HDFS) directory with enough space to store the input training data, the intermediate models and the final model.</li>
</ul>
<p>The program will attempt to locate the Hadoop binary and the Hadoop streaming library. If this fails, you can specify these via command-line parameters (--hadooproot and --streamingloc).</p>
<p>Usage: <a class="el" href="hadoop-run_8py.html" title="A python program which will train a reranking model on a Hadoop cluster using the Iterative Parameter...">hadoop-run.py</a> --input InputData --hdfsinputdir HDFSIndir \ --hdfsoutputdir HDFSOutDir --outputdir OutputDir</p>
<p>InputData - A comma-separated list of file globs containing the training data. These must be accessible by script. OutputDir - The local directory where the trained model(s) are written. The default model name is 'model'. You can change this using the --modelname command-line parameter. HDFSInDir - A directory on HDFS where the input data will be copied to. HDFSOutDir - A directory on HDFS where the temporary data and output data will be written to. The final models are copied to the locally-accessible OutputDir.</p>
<p>Check input command line options. </p>
<dl class="author"><dt><b>Author:</b></dt><dd><a href="mailto:kbhall@google.com">kbhall@google.com</a> (Keith Hall) </dd></dl>

<p>Definition in file <a class="el" href="hadoop-run_8py_source.html">hadoop-run.py</a>.</p>
</div></div><!-- contents -->
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
     onmouseover="return searchBox.OnSearchSelectShow()"
     onmouseout="return searchBox.OnSearchSelectHide()"
     onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark">&#160;</span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark">&#160;</span>Defines</a></div>

<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0" 
        name="MSearchResults" id="MSearchResults">
</iframe>
</div>



<hr class="footer"/><address class="footer"><small>
Generated on Tue Apr 9 2013 11:56:29 for Reranker Framework (ReFr) by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.7.6.1
</small></address>

</body>
</html>
